{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 340,
   "metadata": {},
   "outputs": [],
   "source": [
    "#import and create sparksession object\n",
    "from pyspark.sql import SparkSession \n",
    "spark=SparkSession.builder.appName('rc').getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 341,
   "metadata": {},
   "outputs": [],
   "source": [
    "#import the required functions and libraries\n",
    "from pyspark.sql.functions import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 342,
   "metadata": {},
   "outputs": [],
   "source": [
    "#load the dataset and create sprk dataframe\n",
    "df=spark.read.csv('movie_ratings_df.csv',inferSchema=True,header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 343,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(100000, 3)\n"
     ]
    }
   ],
   "source": [
    "#validate the shape of the data \n",
    "print((df.count(),len(df.columns)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 344,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- userId: integer (nullable = true)\n",
      " |-- title: string (nullable = true)\n",
      " |-- rating: integer (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#check columns in dataframe\n",
    "df.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 346,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------+-----------------------------+------+\n",
      "|userId|title                        |rating|\n",
      "+------+-----------------------------+------+\n",
      "|840   |Amistad (1997)               |4     |\n",
      "|711   |Grand Day Out, A (1992)      |5     |\n",
      "|311   |Casper (1995)                |2     |\n",
      "|717   |Ulee's Gold (1997)           |4     |\n",
      "|389   |Miracle on 34th Street (1994)|5     |\n",
      "|416   |Cool Runnings (1993)         |3     |\n",
      "|449   |Withnail and I (1987)        |5     |\n",
      "|796   |To Kill a Mockingbird (1962) |4     |\n",
      "|658   |Toy Story (1995)             |4     |\n",
      "|345   |Shining, The (1980)          |4     |\n",
      "+------+-----------------------------+------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#validate few rows of dataframe in random order\n",
    "df.orderBy(rand()).show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 347,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------+-----+\n",
      "|userId|count|\n",
      "+------+-----+\n",
      "|405   |737  |\n",
      "|655   |685  |\n",
      "|13    |636  |\n",
      "|450   |540  |\n",
      "|276   |518  |\n",
      "|416   |493  |\n",
      "|537   |490  |\n",
      "|303   |484  |\n",
      "|234   |480  |\n",
      "|393   |448  |\n",
      "+------+-----+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#check number of ratings by each user\n",
    "df.groupBy('userId').count().orderBy('count',ascending=False).show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 348,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------+-----+\n",
      "|userId|count|\n",
      "+------+-----+\n",
      "|732   |20   |\n",
      "|636   |20   |\n",
      "|631   |20   |\n",
      "|926   |20   |\n",
      "|93    |20   |\n",
      "|596   |20   |\n",
      "|572   |20   |\n",
      "|34    |20   |\n",
      "|685   |20   |\n",
      "|300   |20   |\n",
      "+------+-----+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#check number of ratings by each user\n",
    "df.groupBy('userId').count().orderBy('count',ascending=True).show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 349,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----------------------------+-----+\n",
      "|title                        |count|\n",
      "+-----------------------------+-----+\n",
      "|Star Wars (1977)             |583  |\n",
      "|Contact (1997)               |509  |\n",
      "|Fargo (1996)                 |508  |\n",
      "|Return of the Jedi (1983)    |507  |\n",
      "|Liar Liar (1997)             |485  |\n",
      "|English Patient, The (1996)  |481  |\n",
      "|Scream (1996)                |478  |\n",
      "|Toy Story (1995)             |452  |\n",
      "|Air Force One (1997)         |431  |\n",
      "|Independence Day (ID4) (1996)|429  |\n",
      "+-----------------------------+-----+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#number of times movie been rated \n",
    "df.groupBy('title').count().orderBy('count',ascending=False).show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 350,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----------------------------------------+-----+\n",
      "|title                                    |count|\n",
      "+-----------------------------------------+-----+\n",
      "|Lashou shentan (1992)                    |1    |\n",
      "|Fear, The (1995)                         |1    |\n",
      "|Aiqing wansui (1994)                     |1    |\n",
      "|Mad Dog Time (1996)                      |1    |\n",
      "|Leopard Son, The (1996)                  |1    |\n",
      "|Next Step, The (1995)                    |1    |\n",
      "|Target (1995)                            |1    |\n",
      "|Vie est belle, La (Life is Rosey) (1987) |1    |\n",
      "|Modern Affair, A (1995)                  |1    |\n",
      "|JLG/JLG - autoportrait de d�cembre (1994)|1    |\n",
      "+-----------------------------------------+-----+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.groupBy('title').count().orderBy('count',ascending=True).show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 291,
   "metadata": {},
   "outputs": [],
   "source": [
    "#import String indexer to convert string values to numeric values\n",
    "from pyspark.ml.feature import StringIndexer,IndexToString"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 292,
   "metadata": {},
   "outputs": [],
   "source": [
    "#creating string indexer to convert the movie title column values into numerical values\n",
    "stringIndexer = StringIndexer(inputCol=\"title\", outputCol=\"title_new\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 293,
   "metadata": {},
   "outputs": [],
   "source": [
    "#applying stringindexer object on dataframe movie title column\n",
    "model = stringIndexer.fit(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 294,
   "metadata": {},
   "outputs": [],
   "source": [
    "#creating new dataframe with transformed values\n",
    "indexed = model.transform(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 295,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------+--------------------+------+---------+\n",
      "|userId|               title|rating|title_new|\n",
      "+------+--------------------+------+---------+\n",
      "|   932|    Cape Fear (1991)|     3|    161.0|\n",
      "|   721|   Piano, The (1993)|     3|    173.0|\n",
      "|   642|Low Down Dirty Sh...|     2|   1115.0|\n",
      "|   798|That Darn Cat! (1...|     4|    686.0|\n",
      "|   535|African Queen, Th...|     4|    199.0|\n",
      "|   765|Stealing Beauty (...|     5|    521.0|\n",
      "|   927|Poison Ivy II (1995)|     3|   1041.0|\n",
      "|   544|    G.I. Jane (1997)|     3|    152.0|\n",
      "|   788|Godfather: Part I...|     4|    108.0|\n",
      "|   706|Birdcage, The (1996)|     4|     43.0|\n",
      "+------+--------------------+------+---------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#validate the numerical title values\n",
    "indexed.show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 296,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------+-----+\n",
      "|title_new|count|\n",
      "+---------+-----+\n",
      "|0.0      |583  |\n",
      "|1.0      |509  |\n",
      "|2.0      |508  |\n",
      "|3.0      |507  |\n",
      "|4.0      |485  |\n",
      "|5.0      |481  |\n",
      "|6.0      |478  |\n",
      "|7.0      |452  |\n",
      "|8.0      |431  |\n",
      "|9.0      |429  |\n",
      "+---------+-----+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#number of times each numerical movie title has been rated \n",
    "indexed.groupBy('title_new').count().orderBy('count',ascending=False).show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 297,
   "metadata": {},
   "outputs": [],
   "source": [
    "#split the data into training and test datatset\n",
    "train,test=indexed.randomSplit([0.75,0.25])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 298,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "75104"
      ]
     },
     "execution_count": 298,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#count number of records in train set\n",
    "train.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 299,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "24876"
      ]
     },
     "execution_count": 299,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#count number of records in test set\n",
    "test.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 300,
   "metadata": {},
   "outputs": [],
   "source": [
    "#import ALS recommender function from pyspark ml library\n",
    "from pyspark.ml.recommendation import ALS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 301,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Training the recommender model using train datatset\n",
    "rec=ALS(maxIter=10,regParam=0.01,userCol='userId',itemCol='title_new',ratingCol='rating',nonnegative=True,coldStartStrategy=\"drop\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 302,
   "metadata": {},
   "outputs": [],
   "source": [
    "#fit the model on train set\n",
    "rec_model=rec.fit(train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 303,
   "metadata": {},
   "outputs": [],
   "source": [
    "#making predictions on test set \n",
    "predicted_ratings=rec_model.transform(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 337,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- userId: integer (nullable = true)\n",
      " |-- title: string (nullable = true)\n",
      " |-- rating: integer (nullable = true)\n",
      " |-- title_new: double (nullable = false)\n",
      " |-- prediction: float (nullable = false)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#columns in predicted ratings dataframe\n",
    "predicted_ratings.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 304,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------+--------------------+------+---------+----------+\n",
      "|userId|               title|rating|title_new|prediction|\n",
      "+------+--------------------+------+---------+----------+\n",
      "|    92|Tie Me Up! Tie Me...|     4|    766.0| 3.1512196|\n",
      "|   222|       Batman (1989)|     3|    116.0|  3.503284|\n",
      "|   178|Beauty and the Be...|     4|    114.0| 4.1487904|\n",
      "|   303|Jerry Maguire (1996)|     5|     15.0|  4.348913|\n",
      "|   134|      Flubber (1997)|     2|    579.0| 2.5635276|\n",
      "|   295|      Henry V (1989)|     4|    268.0| 4.2598643|\n",
      "|   889|Adventures of Pri...|     2|    305.0| 2.9040515|\n",
      "|   374| Men in Black (1997)|     3|     31.0|  3.602631|\n",
      "|   559|Killing Fields, T...|     4|    276.0|   4.55797|\n",
      "|   290|Star Trek: The Mo...|     1|    286.0| 3.2992659|\n",
      "+------+--------------------+------+---------+----------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#predicted vs actual ratings for test set \n",
    "predicted_ratings.orderBy(rand()).show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 305,
   "metadata": {},
   "outputs": [],
   "source": [
    "#importing Regression Evaluator to measure RMSE\n",
    "from pyspark.ml.evaluation import RegressionEvaluator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 306,
   "metadata": {},
   "outputs": [],
   "source": [
    "#create Regressor evaluator object for measuring accuracy\n",
    "evaluator=RegressionEvaluator(metricName='rmse',predictionCol='prediction',labelCol='rating')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 307,
   "metadata": {},
   "outputs": [],
   "source": [
    "#apply the RE on predictions dataframe to calculate RMSE\n",
    "rmse=evaluator.evaluate(predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 308,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0293574739493354\n"
     ]
    }
   ],
   "source": [
    "#print RMSE error\n",
    "print(rmse)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 309,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Recommend top movies  which user might like "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 310,
   "metadata": {},
   "outputs": [],
   "source": [
    "#create dataset of all distinct movies \n",
    "unique_movies=indexed.select('title_new').distinct()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 311,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1664"
      ]
     },
     "execution_count": 311,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#number of unique movies\n",
    "unique_movies.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 312,
   "metadata": {},
   "outputs": [],
   "source": [
    "#assigning alias name 'a' to unique movies df\n",
    "a = unique_movies.alias('a')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 336,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_id=85"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 321,
   "metadata": {},
   "outputs": [],
   "source": [
    "#creating another dataframe which contains already watched movie by active user \n",
    "watched_movies=indexed.filter(indexed['userId'] == user_id).select('title_new').distinct()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 322,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "287"
      ]
     },
     "execution_count": 322,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#number of movies already rated \n",
    "watched_movies.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 323,
   "metadata": {},
   "outputs": [],
   "source": [
    "#assigning alias name 'b' to watched movies df\n",
    "b=watched_movies.alias('b')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 324,
   "metadata": {},
   "outputs": [],
   "source": [
    "#joining both tables on left join \n",
    "total_movies = a.join(b, a.title_new == b.title_new,how='left')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 325,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------+---------+\n",
      "|title_new|title_new|\n",
      "+---------+---------+\n",
      "|299.0    |null     |\n",
      "|558.0    |null     |\n",
      "|305.0    |305.0    |\n",
      "|596.0    |null     |\n",
      "|1051.0   |null     |\n",
      "|934.0    |null     |\n",
      "|496.0    |496.0    |\n",
      "|769.0    |null     |\n",
      "|692.0    |null     |\n",
      "|720.0    |null     |\n",
      "+---------+---------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "total_movies.show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 326,
   "metadata": {},
   "outputs": [],
   "source": [
    "#selecting movies which active user is yet to rate or watch\n",
    "remaining_movies=total_movies.where(col(\"b.title_new\").isNull()).select(a.title_new).distinct()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 327,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1377"
      ]
     },
     "execution_count": 327,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#number of movies user is yet to rate \n",
    "remaining_movies.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 328,
   "metadata": {},
   "outputs": [],
   "source": [
    "#adding new column of user_Id of active useer to remaining movies df \n",
    "remaining_movies=remaining_movies.withColumn(\"userId\",lit(int(user_id)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 329,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------+------+\n",
      "|title_new|userId|\n",
      "+---------+------+\n",
      "|299.0    |85    |\n",
      "|558.0    |85    |\n",
      "|596.0    |85    |\n",
      "|1051.0   |85    |\n",
      "|934.0    |85    |\n",
      "|769.0    |85    |\n",
      "|692.0    |85    |\n",
      "|720.0    |85    |\n",
      "|576.0    |85    |\n",
      "|810.0    |85    |\n",
      "+---------+------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "remaining_movies.show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 333,
   "metadata": {},
   "outputs": [],
   "source": [
    "#making recommendations using ALS recommender model and selecting only top 'n' movies\n",
    "recommendations=rec_model.transform(remaining_movies).orderBy('prediction',ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 332,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------+------+----------+\n",
      "|title_new|userId|prediction|\n",
      "+---------+------+----------+\n",
      "|1433.0   |85    |4.9689837 |\n",
      "|1322.0   |85    |4.6927013 |\n",
      "|1271.0   |85    |4.605163  |\n",
      "|1470.0   |85    |4.5409293 |\n",
      "|705.0    |85    |4.532007  |\n",
      "+---------+------+----------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "recommendations.show(5,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 334,
   "metadata": {},
   "outputs": [],
   "source": [
    "#converting title_new values back to movie titles\n",
    "movie_title = IndexToString(inputCol=\"title_new\", outputCol=\"title\",labels=model.labels)\n",
    "\n",
    "final_recommendations=movie_title.transform(recommendations)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 335,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------+------+----------+----------------------------+\n",
      "|title_new|userId|prediction|title                       |\n",
      "+---------+------+----------+----------------------------+\n",
      "|1433.0   |85    |4.9689837 |Boys, Les (1997)            |\n",
      "|1322.0   |85    |4.6927013 |Faust (1994)                |\n",
      "|1271.0   |85    |4.605163  |Whole Wide World, The (1996)|\n",
      "|1470.0   |85    |4.5409293 |Some Mother's Son (1996)    |\n",
      "|705.0    |85    |4.532007  |Laura (1944)                |\n",
      "|303.0    |85    |4.5236835 |Close Shave, A (1995)       |\n",
      "|1121.0   |85    |4.4936523 |Crooklyn (1994)             |\n",
      "|1195.0   |85    |4.4636283 |Pather Panchali (1955)      |\n",
      "|285.0    |85    |4.456875  |Wrong Trousers, The (1993)  |\n",
      "|638.0    |85    |4.4495435 |Shall We Dance? (1996)      |\n",
      "+---------+------+----------+----------------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "final_recommendations.show(10,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 338,
   "metadata": {},
   "outputs": [],
   "source": [
    "#create function to recommend top 'n' movies to any particular user\n",
    "def top_movies(user_id,n):\n",
    "    \"\"\"\n",
    "    This function returns the top 'n' movies that user has not seen yet but might like \n",
    "    \n",
    "    \"\"\"\n",
    "    #assigning alias name 'a' to unique movies df\n",
    "    a = unique_movies.alias('a')\n",
    "    \n",
    "    #creating another dataframe which contains already watched movie by active user \n",
    "    watched_movies=indexed.filter(indexed['userId'] == user_id).select('title_new')\n",
    "    \n",
    "    #assigning alias name 'b' to watched movies df\n",
    "    b=watched_movies.alias('b')\n",
    "    \n",
    "    #joining both tables on left join \n",
    "    total_movies = a.join(b, a.title_new == b.title_new,how='left')\n",
    "    \n",
    "    #selecting movies which active user is yet to rate or watch\n",
    "    remaining_movies=total_movies.where(col(\"b.title_new\").isNull()).select(a.title_new).distinct()\n",
    "    \n",
    "    \n",
    "    #adding new column of user_Id of active useer to remaining movies df \n",
    "    remaining_movies=remaining_movies.withColumn(\"userId\",lit(int(user_id)))\n",
    "    \n",
    "    \n",
    "    #making recommendations using ALS recommender model and selecting only top 'n' movies\n",
    "    recommendations=rec_model.transform(remaining_movies).orderBy('prediction',ascending=False).limit(n)\n",
    "    \n",
    "    \n",
    "    #adding columns of movie titles in recommendations\n",
    "    movie_title = IndexToString(inputCol=\"title_new\", outputCol=\"title\",labels=model.labels)\n",
    "    final_recommendations=movie_title.transform(recommendations)\n",
    "    \n",
    "    #return the recommendations to active user\n",
    "    return final_recommendations.show(n,False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 339,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------+------+----------+----------------------------+\n",
      "|title_new|userId|prediction|title                       |\n",
      "+---------+------+----------+----------------------------+\n",
      "|1433.0   |85    |4.9689837 |Boys, Les (1997)            |\n",
      "|1322.0   |85    |4.6927013 |Faust (1994)                |\n",
      "|1271.0   |85    |4.605163  |Whole Wide World, The (1996)|\n",
      "|1470.0   |85    |4.5409293 |Some Mother's Son (1996)    |\n",
      "|705.0    |85    |4.532007  |Laura (1944)                |\n",
      "|303.0    |85    |4.5236835 |Close Shave, A (1995)       |\n",
      "|1121.0   |85    |4.4936523 |Crooklyn (1994)             |\n",
      "|1195.0   |85    |4.4636283 |Pather Panchali (1955)      |\n",
      "|285.0    |85    |4.456875  |Wrong Trousers, The (1993)  |\n",
      "|638.0    |85    |4.4495435 |Shall We Dance? (1996)      |\n",
      "+---------+------+----------+----------------------------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "top_movies(85,10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
